# Import libraries
library(plotly)
library(heatmaply)
library(chromVAR)
library(motifmatchr)
library(BSgenome.Hsapiens.UCSC.hg19)
library(SummarizedExperiment)
library(BuenColors)
library(chromVARxx)
library(chromVARmotifs)
library(BiocParallel)
register(MulticoreParam(2)) # adjust according to your machine
peakfile <- "../data/peaks/heme_peaks.bed"
peaks <- getPeaks(peakfile)
bamfiles <- list.files("../data/bams", full.names = TRUE)
raw_counts <- getCounts(bamfiles, peaks, paired = TRUE, by_rg = TRUE, format = "bam",
colData = DataFrame(source = bamfiles))
# discuss paired, by_rg, etc.
counts_filtered <- filterSamples(raw_counts, min_depth = 500,
min_in_peaks = 0.15, shiny = FALSE)
counts <- filterPeaks(counts_filtered)
dim(raw_counts)
## [1] 590650 16
dim(counts_filtered)
## [1] 590650 12
dim(counts)
## [1] 15518 12
counts <- addGCBias(counts, genome = BSgenome.Hsapiens.UCSC.hg19)
data("human_pwms_v1") # also mouse_pwms_v1
motif_ix <- matchMotifs(human_pwms_v1, counts, genome = BSgenome.Hsapiens.UCSC.hg19)
kmer_ix <- matchKmers(5, counts, genome = BSgenome.Hsapiens.UCSC.hg19)
dim(motif_ix)
## [1] 15518 1764
dim(kmer_ix)
## [1] 15518 512
dev <- computeDeviations(object = counts, annotations = motif_ix)
variabilityAll <- computeVariability(dev)
plotVariability(variabilityAll, use_plotly = TRUE)
Figure 1a; variability across motifs
variabilityBagged <- computeVariability(bagged)
plotVariability(variabilityBagged, use_plotly = TRUE)
Figure 1b; variability across reasonably unique motifs
mostvariable <- tail(sort(variabilityAll$variability, index.return = TRUE)$ix,30)
m <- assay(dev)[mostvariable,]
heatmaply(m, colors = jdb_palette("flame_light"))
Figure 2a; motifs x samples
mostvariable <- tail(sort(variabilityBagged$variability, index.return = TRUE)$ix,30)
m <- assay(bagged)[mostvariable,]
heatmaply(m, colors = jdb_palette("flame_light"))
Figure 2b; bagged motifs x samples